{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ff724738",
   "metadata": {},
   "outputs": [],
   "source": [
    "import jieba\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d892deac",
   "metadata": {},
   "outputs": [],
   "source": [
    "keyword_cn_df = pd.read_csv('../datasets/重写查询词1.csv', encoding='utf-8-sig', header=None)\n",
    "qr1_cn_df = pd.read_csv('../datasets/query_rewrite_1.csv', encoding='utf-8-sig', header=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "b63c7230",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "180648"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(keyword_cn_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "ae3a642e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "180648"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(qr1_cn_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "df70e0bf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>query</th>\n",
       "      <th>rewrite_01</th>\n",
       "      <th>rewrite_02</th>\n",
       "      <th>rewrite_03</th>\n",
       "      <th>rewrite_04</th>\n",
       "      <th>rewrite_05</th>\n",
       "      <th>rewrite_06</th>\n",
       "      <th>rewrite_07</th>\n",
       "      <th>rewrite_08</th>\n",
       "      <th>rewrite_09</th>\n",
       "      <th>rewrite_10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>01驱动器</td>\n",
       "      <td>假面骑士01驱动器</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>03echoo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>055驱逐舰模型</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>05单兵饭盒</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0w一20全合成机油</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0一3个月婴儿帽子</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0一3个月婴儿上衣</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0一3个月婴儿用品</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0一3婴儿衣服冬装</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0一3月婴儿用品</td>\n",
       "      <td>0一3个月婴儿用品</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        query rewrite_01 rewrite_02 rewrite_03 rewrite_04 rewrite_05  \\\n",
       "0       01驱动器  假面骑士01驱动器        NaN        NaN        NaN        NaN   \n",
       "1     03echoo        NaN        NaN        NaN        NaN        NaN   \n",
       "2    055驱逐舰模型        NaN        NaN        NaN        NaN        NaN   \n",
       "3      05单兵饭盒        NaN        NaN        NaN        NaN        NaN   \n",
       "4  0w一20全合成机油        NaN        NaN        NaN        NaN        NaN   \n",
       "5   0一3个月婴儿帽子        NaN        NaN        NaN        NaN        NaN   \n",
       "6   0一3个月婴儿上衣        NaN        NaN        NaN        NaN        NaN   \n",
       "7   0一3个月婴儿用品        NaN        NaN        NaN        NaN        NaN   \n",
       "8   0一3婴儿衣服冬装        NaN        NaN        NaN        NaN        NaN   \n",
       "9    0一3月婴儿用品  0一3个月婴儿用品        NaN        NaN        NaN        NaN   \n",
       "\n",
       "  rewrite_06 rewrite_07 rewrite_08 rewrite_09 rewrite_10  \n",
       "0        NaN        NaN        NaN        NaN        NaN  \n",
       "1        NaN        NaN        NaN        NaN        NaN  \n",
       "2        NaN        NaN        NaN        NaN        NaN  \n",
       "3        NaN        NaN        NaN        NaN        NaN  \n",
       "4        NaN        NaN        NaN        NaN        NaN  \n",
       "5        NaN        NaN        NaN        NaN        NaN  \n",
       "6        NaN        NaN        NaN        NaN        NaN  \n",
       "7        NaN        NaN        NaN        NaN        NaN  \n",
       "8        NaN        NaN        NaN        NaN        NaN  \n",
       "9        NaN        NaN        NaN        NaN        NaN  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "qr1_cn_df[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "402edaf1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>01驱动器</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>03echoo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>055驱逐舰模型</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>05单兵饭盒</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0w一20全合成机油</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0一3个月婴儿帽子</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0一3个月婴儿上衣</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0一3个月婴儿用品</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0一3婴儿衣服冬装</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0一3月婴儿用品</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            0\n",
       "0       01驱动器\n",
       "1     03echoo\n",
       "2    055驱逐舰模型\n",
       "3      05单兵饭盒\n",
       "4  0w一20全合成机油\n",
       "5   0一3个月婴儿帽子\n",
       "6   0一3个月婴儿上衣\n",
       "7   0一3个月婴儿用品\n",
       "8   0一3婴儿衣服冬装\n",
       "9    0一3月婴儿用品"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keyword_cn_df[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c56c76b1",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
